# !pip install numpy
# !pip install pandas
# !pip install sklearn
# !pip install qiskit
image source: https://www.embedded-robotics.com/iris-dataset-classification/
X variables: [Sepal length, Sepal width, Petal length, Petal width], (각 Sepal: 꽃받침, Petal: 꽃잎 의 가로, 세로 길이)
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
# visualization package
import matplotlib.pyplot as plt
import seaborn as sns
# sample data load
iris = load_iris()
# pring out data with variable types and its description
# print(iris)
# Description of the dataset
print(iris.DESCR)
.. _iris_dataset:
Iris plants dataset
--------------------
**Data Set Characteristics:**
:Number of Instances: 150 (50 in each of three classes)
:Number of Attributes: 4 numeric, predictive attributes and the class
:Attribute Information:
- sepal length in cm
- sepal width in cm
- petal length in cm
- petal width in cm
- class:
- Iris-Setosa
- Iris-Versicolour
- Iris-Virginica
:Summary Statistics:
============== ==== ==== ======= ===== ====================
Min Max Mean SD Class Correlation
============== ==== ==== ======= ===== ====================
sepal length: 4.3 7.9 5.84 0.83 0.7826
sepal width: 2.0 4.4 3.05 0.43 -0.4194
petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
============== ==== ==== ======= ===== ====================
:Missing Attribute Values: None
:Class Distribution: 33.3% for each of 3 classes.
:Creator: R.A. Fisher
:Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
:Date: July, 1988
The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken
from Fisher's paper. Note that it's the same as in R, but not as in the UCI
Machine Learning Repository, which has two wrong data points.
This is perhaps the best known database to be found in the
pattern recognition literature. Fisher's paper is a classic in the field and
is referenced frequently to this day. (See Duda & Hart, for example.) The
data set contains 3 classes of 50 instances each, where each class refers to a
type of iris plant. One class is linearly separable from the other 2; the
latter are NOT linearly separable from each other.
.. topic:: References
- Fisher, R.A. "The use of multiple measurements in taxonomic problems"
Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to
Mathematical Statistics" (John Wiley, NY, 1950).
- Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.
(Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
- Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
Structure and Classification Rule for Recognition in Partially Exposed
Environments". IEEE Transactions on Pattern Analysis and Machine
Intelligence, Vol. PAMI-2, No. 1, 67-71.
- Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE Transactions
on Information Theory, May 1972, 431-433.
- See also: 1988 MLC Proceedings, 54-64. Cheeseman et al"s AUTOCLASS II
conceptual clustering system finds 3 classes in the data.
- Many, many more ...
# feature_names(x variables) 와 target(y variable)을 잘 나타내도록 data frame 생성
data_iris = pd.DataFrame(data=iris.data, columns=iris.feature_names)
data_iris['species'] = iris.target
# Mapping the labels-'species' with numbers
data_iris['species'] = data_iris['species'].map(
{0: "setosa", 1: "versicolor", 2: "virginica"})
print(data_iris)
# Plot scatter plots and density distribution plots feature-wise WITH labels
sns.set(font_scale=1.5)
sns.pairplot(data_iris, hue="species", height=3)
plt.show()
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
.. ... ... ... ...
145 6.7 3.0 5.2 2.3
146 6.3 2.5 5.0 1.9
147 6.5 3.0 5.2 2.0
148 6.2 3.4 5.4 2.3
149 5.9 3.0 5.1 1.8
species
0 setosa
1 setosa
2 setosa
3 setosa
4 setosa
.. ...
145 virginica
146 virginica
147 virginica
148 virginica
149 virginica
[150 rows x 5 columns]
# Plot scatter plots and density distribution plots feature-wise WITHOUT any labels
sns.set(font_scale=1.5)
sns.pairplot(data_iris, height=3)
plt.show()
# Descriptive statistics of the dataset
data_iris.describe()
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | |
|---|---|---|---|---|
| count | 150.000000 | 150.000000 | 150.000000 | 150.000000 |
| mean | 5.843333 | 3.057333 | 3.758000 | 1.199333 |
| std | 0.828066 | 0.435866 | 1.765298 | 0.762238 |
| min | 4.300000 | 2.000000 | 1.000000 | 0.100000 |
| 25% | 5.100000 | 2.800000 | 1.600000 | 0.300000 |
| 50% | 5.800000 | 3.000000 | 4.350000 | 1.300000 |
| 75% | 6.400000 | 3.300000 | 5.100000 | 1.800000 |
| max | 7.900000 | 4.400000 | 6.900000 | 2.500000 |
data_iris['species'].value_counts()
# pd.crosstab(index=data_iris['species'], columns="count")
setosa 50 versicolor 50 virginica 50 Name: species, dtype: int64
# Calculate the Euclidean distance of the data
iris_euclidean_dist = np.linalg.norm(data_iris.iloc[:, 0:4].values, axis=1)
iris_euclidean_dist
array([ 6.34507683, 5.91692488, 5.83609458, 5.7497826 , 6.32139225,
6.88621812, 5.8966092 , 6.23297682, 5.45618915, 5.98999165,
6.71863081, 6.09918027, 5.83180932, 5.35817133, 7.14982517,
7.36613874, 6.79852925, 6.34901567, 7.06470098, 6.54140658,
6.60681466, 6.48922183, 5.92958683, 6.32771681, 6.18465844,
6.04979338, 6.26737585, 6.44825558, 6.37181293, 5.91016074,
5.93717104, 6.56734345, 6.79043445, 7.06328535, 5.99249531,
6.05970296, 6.65056389, 6.2401923 , 5.48543526, 6.31347765,
6.24739946, 5.22685374, 5.59732079, 6.33798075, 6.64981203,
5.83866423, 6.56124988, 5.77927331, 6.63852393, 6.15548536,
9.12633552, 8.58487041, 9.13673902, 7.29588925, 8.5732141 ,
7.89113427, 8.67352293, 6.45445583, 8.64985549, 7.17635005,
6.5 , 7.98122798, 7.60526134, 8.3468557 , 7.37699126,
8.70746806, 7.92842986, 7.6642025 , 8.11048704, 7.35051019,
8.44570897, 7.92085854, 8.49705831, 8.28130425, 8.33966426,
8.59534758, 8.89269363, 9.04322951, 8.1798533 , 7.24568837,
7.18748913, 7.12039325, 7.58814865, 8.47702778, 7.78845299,
8.38868285, 8.87918915, 8.12588457, 7.67202711, 7.36138574,
7.60328876, 8.32646384, 7.60526134, 6.49461315, 7.61445993,
7.78267306, 7.76079893, 8.18718511, 6.5169011 , 7.67007171,
9.63483264, 8.39940474, 9.93126377, 9.09395404, 9.47259204,
10.71120908, 7.30753036, 10.22888068, 9.38189746, 10.40480658,
9.08295106, 8.94147639, 9.48156105, 8.23043134, 8.55862138,
9.19673855, 9.20543318, 11.11125555, 10.90642013, 8.2516665 ,
9.77905926, 8.19817053, 10.77125805, 8.61568337, 9.62704524,
10.06578363, 8.51821578, 8.57088093, 9.19619487, 9.85088828,
10.16956243, 11.03675677, 9.21954446, 8.70574523, 8.79147314,
10.52568288, 9.4005319 , 9.16842407, 8.44274837, 9.52837867,
9.57183368, 9.40850679, 8.39940474, 9.8275124 , 9.72213968,
9.28547252, 8.63423419, 9.07138358, 9.18966811, 8.54751426])
# Create new column of Euclidean distance
data_iris['Euclid_dist'] = iris_euclidean_dist
data_iris['Euclid_dist_sq'] = iris_euclidean_dist**2
# # Function that calculates Mahalanobis distance
# def mahalanobis(x=None, data=None, cov=None):
# x_mu = x - x.mean()
# if not cov:
# cov = np.cov(data.values.T)
# inv_covmat = np.linalg.inv(cov)
# left = np.dot(x_mu, inv_covmat)
# mahal = np.dot(left, x_mu.T)
# return mahal.diagonal()
# # Calculate the Mahalanobis distance of the data
# Mahal_dist = mahalanobis(x=data_iris.iloc[:,range(4)], data=data_iris.iloc[:,range(4)])
# Mahal_dist
# # Create new column of Mahalanobis distance
# data_iris['Mahal_dist'] = Mahal_dist
# data_iris['Mahal_dist_sq'] = Mahal_dist**2
data_iris[['species', 'Euclid_dist', 'Euclid_dist_sq']]
# data_iris[['species', 'Euclid_dist','Euclid_dist_sq','Mahal_dist', 'Mahal_dist_sq']]
| species | Euclid_dist | Euclid_dist_sq | |
|---|---|---|---|
| 0 | setosa | 6.345077 | 40.26 |
| 1 | setosa | 5.916925 | 35.01 |
| 2 | setosa | 5.836095 | 34.06 |
| 3 | setosa | 5.749783 | 33.06 |
| 4 | setosa | 6.321392 | 39.96 |
| ... | ... | ... | ... |
| 145 | virginica | 9.285473 | 86.22 |
| 146 | virginica | 8.634234 | 74.55 |
| 147 | virginica | 9.071384 | 82.29 |
| 148 | virginica | 9.189668 | 84.45 |
| 149 | virginica | 8.547514 | 73.06 |
150 rows × 3 columns
# Plot scatter plots and density distribution plots feature-wise WITH labels
sns.set(font_scale=1.5)
sns.pairplot(data_iris, hue="species", height=3)
plt.show()
# Plot scatter plots and density distribution plots feature-wise WITHOUT any labels
sns.set(font_scale=1.5)
sns.pairplot(data_iris, height=3)
plt.show()
sns.pairplot(data_iris[['species', 'Euclid_dist',
'Euclid_dist_sq']], hue="species", height=3)
# sns.pairplot(data_iris[['species', 'Euclid_dist', 'Euclid_dist_sq', 'Mahal_dist','Mahal_dist_sq']], hue="species", height=3)
<seaborn.axisgrid.PairGrid at 0x16c4a28a9a0>
sns.pairplot(data_iris[['species', 'Euclid_dist', 'Euclid_dist_sq']], height=3)
# sns.pairplot(data_iris[['species', 'Euclid_dist', 'Euclid_dist_sq', 'Mahal_dist','Mahal_dist_sq']], height=3)
<seaborn.axisgrid.PairGrid at 0x16c4982cc40>
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
def draw_graph(G, col, pos):
plt.figure(figsize=(12, 8))
default_axes = plt.axes(frameon=True)
nx.draw_networkx(G, node_color=col, node_size=600,
alpha=0.8, ax=default_axes, pos=pos, font_size=16)
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(
G, pos=pos, edge_labels=edge_labels, font_size=16)
n = 6 # number of nodes in graph
np.random.seed(150)
edge_weights = np.random.randint(1, 5, size=(n, n))
edge_weights = edge_weights * edge_weights.T / 2
G = nx.Graph()
G.add_nodes_from(np.arange(0, n, 1))
for i in range(n):
for j in range(n):
if i > j:
G.add_edge(i, j, weight=edge_weights[i, j])
colors = ['g' for node in G.nodes()]
pos = nx.spring_layout(G)
# graph G: nodes
G.nodes
NodeView((0, 1, 2, 3, 4, 5))
# graph G: edges
G.edges
EdgeView([(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 2), (1, 3), (1, 4), (1, 5), (2, 3), (2, 4), (2, 5), (3, 4), (3, 5), (4, 5)])
# graph G: edges with weights
G.edges.data()
EdgeDataView([(0, 1, {'weight': 1.5}), (0, 2, {'weight': 6.0}), (0, 3, {'weight': 1.0}), (0, 4, {'weight': 0.5}), (0, 5, {'weight': 3.0}), (1, 2, {'weight': 6.0}), (1, 3, {'weight': 3.0}), (1, 4, {'weight': 2.0}), (1, 5, {'weight': 2.0}), (2, 3, {'weight': 0.5}), (2, 4, {'weight': 4.0}), (2, 5, {'weight': 4.0}), (3, 4, {'weight': 2.0}), (3, 5, {'weight': 4.0}), (4, 5, {'weight': 6.0})])
# Plot of the give graph G
draw_graph(G, colors, pos)
# Adjacency matrix of weighted graph
w = np.zeros([n, n])
for i in range(n):
for j in range(n):
temp = G.get_edge_data(i, j, default=0)
if temp != 0:
w[i, j] = temp['weight']
w
array([[0. , 1.5, 6. , 1. , 0.5, 3. ],
[1.5, 0. , 6. , 3. , 2. , 2. ],
[6. , 6. , 0. , 0.5, 4. , 4. ],
[1. , 3. , 0.5, 0. , 2. , 4. ],
[0.5, 2. , 4. , 2. , 0. , 6. ],
[3. , 2. , 4. , 4. , 6. , 0. ]])
best_cost_brute = 0
for b in range(2**n):
x = [int(t) for t in reversed(list(bin(b)[2:].zfill(n)))]
cost = 0
for i in range(n):
for j in range(n):
cost = cost + w[i, j] * x[i] * (1-x[j])
if best_cost_brute < cost:
best_cost_brute = cost
xbest_brute = x
print('case =', '%-20s' % str(x), ' cost =', '%-6s' %
str(cost), ' try =', str(b+1))
colors_brute = ['g' if xbest_brute[i] == 0 else 'c' for i in range(n)]
print('\nBest case(solution) =', '%-20s' %
str(xbest_brute), ' cost =', '%-6s' % str(best_cost_brute))
case = [0, 0, 0, 0, 0, 0] cost = 0.0 try = 1 case = [1, 0, 0, 0, 0, 0] cost = 12.0 try = 2 case = [0, 1, 0, 0, 0, 0] cost = 14.5 try = 3 case = [1, 1, 0, 0, 0, 0] cost = 23.5 try = 4 case = [0, 0, 1, 0, 0, 0] cost = 20.5 try = 5 case = [1, 0, 1, 0, 0, 0] cost = 20.5 try = 6 case = [0, 1, 1, 0, 0, 0] cost = 23.0 try = 7 case = [1, 1, 1, 0, 0, 0] cost = 20.0 try = 8 case = [0, 0, 0, 1, 0, 0] cost = 10.5 try = 9 case = [1, 0, 0, 1, 0, 0] cost = 20.5 try = 10 case = [0, 1, 0, 1, 0, 0] cost = 19.0 try = 11 case = [1, 1, 0, 1, 0, 0] cost = 26.0 try = 12 case = [0, 0, 1, 1, 0, 0] cost = 30.0 try = 13 case = [1, 0, 1, 1, 0, 0] cost = 28.0 try = 14 case = [0, 1, 1, 1, 0, 0] cost = 26.5 try = 15 case = [1, 1, 1, 1, 0, 0] cost = 21.5 try = 16 case = [0, 0, 0, 0, 1, 0] cost = 14.5 try = 17 case = [1, 0, 0, 0, 1, 0] cost = 25.5 try = 18 case = [0, 1, 0, 0, 1, 0] cost = 25.0 try = 19 case = [1, 1, 0, 0, 1, 0] cost = 33.0 try = 20 case = [0, 0, 1, 0, 1, 0] cost = 27.0 try = 21 case = [1, 0, 1, 0, 1, 0] cost = 26.0 try = 22 case = [0, 1, 1, 0, 1, 0] cost = 25.5 try = 23 case = [1, 1, 1, 0, 1, 0] cost = 21.5 try = 24 case = [0, 0, 0, 1, 1, 0] cost = 21.0 try = 25 case = [1, 0, 0, 1, 1, 0] cost = 30.0 try = 26 case = [0, 1, 0, 1, 1, 0] cost = 25.5 try = 27 case = [1, 1, 0, 1, 1, 0] cost = 31.5 try = 28 case = [0, 0, 1, 1, 1, 0] cost = 32.5 try = 29 case = [1, 0, 1, 1, 1, 0] cost = 29.5 try = 30 case = [0, 1, 1, 1, 1, 0] cost = 25.0 try = 31 case = [1, 1, 1, 1, 1, 0] cost = 19.0 try = 32 case = [0, 0, 0, 0, 0, 1] cost = 19.0 try = 33 case = [1, 0, 0, 0, 0, 1] cost = 25.0 try = 34 case = [0, 1, 0, 0, 0, 1] cost = 29.5 try = 35 case = [1, 1, 0, 0, 0, 1] cost = 32.5 try = 36 case = [0, 0, 1, 0, 0, 1] cost = 31.5 try = 37 case = [1, 0, 1, 0, 0, 1] cost = 25.5 try = 38 case = [0, 1, 1, 0, 0, 1] cost = 30.0 try = 39 case = [1, 1, 1, 0, 0, 1] cost = 21.0 try = 40 case = [0, 0, 0, 1, 0, 1] cost = 21.5 try = 41 case = [1, 0, 0, 1, 0, 1] cost = 25.5 try = 42 case = [0, 1, 0, 1, 0, 1] cost = 26.0 try = 43 case = [1, 1, 0, 1, 0, 1] cost = 27.0 try = 44 case = [0, 0, 1, 1, 0, 1] cost = 33.0 try = 45 case = [1, 0, 1, 1, 0, 1] cost = 25.0 try = 46 case = [0, 1, 1, 1, 0, 1] cost = 25.5 try = 47 case = [1, 1, 1, 1, 0, 1] cost = 14.5 try = 48 case = [0, 0, 0, 0, 1, 1] cost = 21.5 try = 49 case = [1, 0, 0, 0, 1, 1] cost = 26.5 try = 50 case = [0, 1, 0, 0, 1, 1] cost = 28.0 try = 51 case = [1, 1, 0, 0, 1, 1] cost = 30.0 try = 52 case = [0, 0, 1, 0, 1, 1] cost = 26.0 try = 53 case = [1, 0, 1, 0, 1, 1] cost = 19.0 try = 54 case = [0, 1, 1, 0, 1, 1] cost = 20.5 try = 55 case = [1, 1, 1, 0, 1, 1] cost = 10.5 try = 56 case = [0, 0, 0, 1, 1, 1] cost = 20.0 try = 57 case = [1, 0, 0, 1, 1, 1] cost = 23.0 try = 58 case = [0, 1, 0, 1, 1, 1] cost = 20.5 try = 59 case = [1, 1, 0, 1, 1, 1] cost = 20.5 try = 60 case = [0, 0, 1, 1, 1, 1] cost = 23.5 try = 61 case = [1, 0, 1, 1, 1, 1] cost = 14.5 try = 62 case = [0, 1, 1, 1, 1, 1] cost = 12.0 try = 63 case = [1, 1, 1, 1, 1, 1] cost = 0.0 try = 64 Best case(solution) = [1, 1, 0, 0, 1, 0] cost = 33.0
draw_graph(G, colors_brute, pos)
from qiskit import QuantumCircuit, Aer
from qiskit.circuit import Parameter
def maxcut_obj(solution, graph):
obj = 0
for i, j in graph.edges():
if solution[i] != solution[j]:
obj -= 1 * w[i][j]
return obj # cost function(hamiltonian)
def compute_expectation(counts, graph):
avg = 0
sum_count = 0
for bit_string, count in counts.items():
obj = maxcut_obj(bit_string, graph)
avg += obj * count
sum_count += count # sum_count is shot
return avg/sum_count # minimize this function
def create_qaoa_circ(graph, theta):
nqubits = len(graph.nodes())
n_layers = len(theta)//2
beta = theta[:n_layers]
gamma = theta[n_layers:]
qc = QuantumCircuit(nqubits)
qc.h(range(nqubits))
for layer_index in range(n_layers):
for pair in list(graph.edges()):
qc.rzz(2 * gamma[layer_index] * w[pair[0]]
[pair[1]], pair[0], pair[1])
for qubit in range(nqubits):
qc.rx(2 * beta[layer_index], qubit)
qc.measure_all()
return qc
def get_expectation(graph, shots=512):
backend = Aer.get_backend('qasm_simulator')
backend.shots = shots
def execute_circ(theta):
qc = create_qaoa_circ(graph, theta)
counts = backend.run(qc, seed_simulator=10,
nshots=512).result().get_counts()
return compute_expectation(counts, graph)
return execute_circ
from scipy.optimize import minimize
expectation = get_expectation(G)
p = 1 # 32 64
res = minimize(expectation, np.ones(p*2)*np.pi/2,
method='COBYLA', options={'maxiter': 2500})
res
fun: -24.17529296875
maxcv: 0.0
message: 'Optimization terminated successfully.'
nfev: 31
status: 1
success: True
x: array([2.57910398, 1.5991255 ])
from qiskit.visualization import plot_histogram
backend = Aer.get_backend('aer_simulator')
backend.shots = 2**14
qc_res = create_qaoa_circ(G, res.x)
counts = backend.run(qc_res, seed_simulator=10).result().get_counts()
plot_histogram(counts, figsize=(20, 5), title='512 Shots')
# Plot the Quantum Circuit of QAOA
# qc_res.draw(output='mpl', plot_barriers=True).savefig('QAOA_circuit.png', dpi=720)
qc_res.draw(output='mpl', plot_barriers=True)
# qc_res.draw(output='mpl', plot_barriers=True, fold=-1).savefig('QAOA_circuit(full).png', dpi=720)
qc_res.draw(output='mpl', plot_barriers=True, fold=-1)
str(counts)
"{'001001': 54, '100001': 23, '001010': 15, '111111': 3, '101001': 79, '111010': 82, '001000': 16, '101000': 30, '011011': 22, '011110': 27, '000001': 2, '110110': 67, '010111': 23, '101100': 1, '000011': 6, '011111': 8, '110010': 16, '101110': 2, '000101': 67, '010001': 5, '001110': 4, '111011': 21, '100101': 65, '010110': 75, '110011': 3, '000100': 16, '101101': 8, '000111': 8, '100100': 25, '111100': 5, '001101': 34, '010010': 9, '101111': 5, '000110': 4, '110001': 13, '011010': 62, '100000': 8, '000000': 6, '110111': 12, '001100': 2, '100011': 12, '100111': 13, '011000': 9, '111000': 6, '000010': 3, '110101': 16, '101011': 2, '111001': 10, '010000': 5, '001011': 1, '111110': 3, '010011': 4, '010100': 1, '011100': 5, '010101': 1}"
# Sort the counted shot results
{k: v for k, v in sorted(counts.items(), key=lambda item: item[1])}
{'101100': 1,
'001011': 1,
'010100': 1,
'010101': 1,
'000001': 2,
'101110': 2,
'001100': 2,
'101011': 2,
'111111': 3,
'110011': 3,
'000010': 3,
'111110': 3,
'001110': 4,
'000110': 4,
'010011': 4,
'010001': 5,
'111100': 5,
'101111': 5,
'010000': 5,
'011100': 5,
'000011': 6,
'000000': 6,
'111000': 6,
'011111': 8,
'101101': 8,
'000111': 8,
'100000': 8,
'010010': 9,
'011000': 9,
'111001': 10,
'110111': 12,
'100011': 12,
'110001': 13,
'100111': 13,
'001010': 15,
'001000': 16,
'110010': 16,
'000100': 16,
'110101': 16,
'111011': 21,
'011011': 22,
'100001': 23,
'010111': 23,
'100100': 25,
'011110': 27,
'101000': 30,
'001101': 34,
'001001': 54,
'011010': 62,
'100101': 65,
'110110': 67,
'000101': 67,
'010110': 75,
'101001': 79,
'111010': 82}
result_col = list(map(int, list(max(counts, key=counts.get))))
result_colors = ['g' if result_col[i] == 0 else 'c' for i in range(n)]
# Result of Brute Force algorithm
draw_graph(G, colors_brute, pos)
# Result of QAOA
draw_graph(G, result_colors, pos)
print('xbest_brute :', xbest_brute)
print('QAOA :', result_col)
xbest_brute : [1, 1, 0, 0, 1, 0] QAOA : [1, 1, 1, 0, 1, 0]
from tqdm import tqdm
p = 16
res = []
for i in tqdm(range(1, p+1)):
res.append(minimize(expectation, np.ones(i*2)*np.pi/2,
method='COBYLA', options={'maxiter': 2500}))
100%|██████████| 16/16 [00:22<00:00, 1.39s/it]
res[0:2]
[ fun: -24.17529296875
maxcv: 0.0
message: 'Optimization terminated successfully.'
nfev: 31
status: 1
success: True
x: array([2.57910398, 1.5991255 ]),
fun: -24.716796875
maxcv: 0.0
message: 'Optimization terminated successfully.'
nfev: 54
status: 1
success: True
x: array([2.52461869, 1.63875349, 1.61624568, 1.52923708])]
approx = []
for i in range(p):
approx.append(-res[i].fun/best_cost_brute)
x = np.arange(1, p+1, 1)
plt.figure(figsize=(8, 6))
plt.plot(x, approx, marker='o', markersize=6, c='k', linestyle='--')
plt.ylim((0.5, 1))
plt.xlim(0, p)
plt.xlabel('iteration')
plt.ylabel('Approximation')
plt.grid(True)
plt.show()
best_p = np.argmax(approx)
print("The best p(iterationo number) is", best_p)
The best p(iterationo number) is 11
res[best_p].x
array([2.69931677, 1.70145888, 1.83501131, 1.52640593, 1.70235575,
1.52533461, 1.53010659, 1.94876412, 1.40228443, 1.54544357,
1.47815615, 1.48456692, 1.58893494, 1.57102304, 1.58103879,
1.68907833, 1.40984573, 1.6220806 , 1.63468791, 1.62148146,
1.52878007, 1.70463034, 1.58593247, 1.60855217])
from qiskit.visualization import plot_histogram
backend = Aer.get_backend('aer_simulator')
backend.shots = 512
qc_res = create_qaoa_circ(G, res[best_p].x)
counts = backend.run(qc_res, seed_simulator=10).result().get_counts()
plot_histogram(counts, figsize=(20, 5), title='512 Shots')
result_col = list(map(int, list(max(counts, key=counts.get))))
result_colors = ['g' if result_col[i] == 0 else 'c' for i in range(n)]
print('xbest_brute :', xbest_brute)
print('QAOA :', result_col)
xbest_brute : [1, 1, 0, 0, 1, 0] QAOA : [1, 1, 0, 0, 1, 0]
draw_graph(G, colors_brute, pos)
draw_graph(G, result_colors, pos)
print('Best solution - Brute Force : ' +
str(xbest_brute) + ', cost = ' + str(best_cost_brute))
print('Best solution - QAOA : ' + str(result_col) +
', cost = ' + str(-res[best_p].fun))
Best solution - Brute Force : [1, 1, 0, 0, 1, 0], cost = 33.0 Best solution - QAOA : [1, 1, 0, 0, 1, 0], cost = 28.9130859375
data_iris
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | species | Euclid_dist | Euclid_dist_sq | |
|---|---|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa | 6.345077 | 40.26 |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa | 5.916925 | 35.01 |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa | 5.836095 | 34.06 |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa | 5.749783 | 33.06 |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa | 6.321392 | 39.96 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 145 | 6.7 | 3.0 | 5.2 | 2.3 | virginica | 9.285473 | 86.22 |
| 146 | 6.3 | 2.5 | 5.0 | 1.9 | virginica | 8.634234 | 74.55 |
| 147 | 6.5 | 3.0 | 5.2 | 2.0 | virginica | 9.071384 | 82.29 |
| 148 | 6.2 | 3.4 | 5.4 | 2.3 | virginica | 9.189668 | 84.45 |
| 149 | 5.9 | 3.0 | 5.1 | 1.8 | virginica | 8.547514 | 73.06 |
150 rows × 7 columns
num_sample1 = 4
num_sample2 = 4
sample_df1 = data_iris[data_iris['species'] ==
'setosa'].sample(num_sample1).sort_index()
# sample_df2 = data_iris[data_iris['species'] =='versicolor'].sample(num_sample).sort_index()
sample_df3 = data_iris[data_iris['species'] ==
'virginica'].sample(num_sample2).sort_index()
# data_iris_sample = pd.concat([sample_df1, sample_df2, sample_df3])
data_iris_sample = pd.concat([sample_df1, sample_df3])
data_iris_sample
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | species | Euclid_dist | Euclid_dist_sq | |
|---|---|---|---|---|---|---|---|
| 14 | 5.8 | 4.0 | 1.2 | 0.2 | setosa | 7.149825 | 51.12 |
| 15 | 5.7 | 4.4 | 1.5 | 0.4 | setosa | 7.366139 | 54.26 |
| 27 | 5.2 | 3.5 | 1.5 | 0.2 | setosa | 6.448256 | 41.58 |
| 33 | 5.5 | 4.2 | 1.4 | 0.2 | setosa | 7.063285 | 49.89 |
| 114 | 5.8 | 2.8 | 5.1 | 2.4 | virginica | 8.558621 | 73.25 |
| 125 | 7.2 | 3.2 | 6.0 | 1.8 | virginica | 10.065784 | 101.32 |
| 140 | 6.7 | 3.1 | 5.6 | 2.4 | virginica | 9.571834 | 91.62 |
| 141 | 6.9 | 3.1 | 5.1 | 2.3 | virginica | 9.408507 | 88.52 |
data_iris_qaoa = data_iris_sample[[
'sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']]
data_iris_qaoa = np.array(data_iris_qaoa)
data_iris_qaoa_label = iris.target[data_iris_sample.index]
data_iris_qaoa
array([[5.8, 4. , 1.2, 0.2],
[5.7, 4.4, 1.5, 0.4],
[5.2, 3.5, 1.5, 0.2],
[5.5, 4.2, 1.4, 0.2],
[5.8, 2.8, 5.1, 2.4],
[7.2, 3.2, 6. , 1.8],
[6.7, 3.1, 5.6, 2.4],
[6.9, 3.1, 5.1, 2.3]])
data_iris_qaoa_label
array([0, 0, 0, 0, 2, 2, 2, 2])
len(data_iris_qaoa_label)
8
# Function to calculate the distance between given two data points
import math
def dist(a, b):
"Euclidean dist between two lists"
d = np.linalg.norm(np.array(a) - np.array(b), axis=0)
return round(d, 4)
import random
# Assign the number of nodes, edge connection, and its weight of the Graph.
n = len(data_iris_qaoa_label)
data = data_iris_qaoa
label = data_iris_qaoa_label
datapoints = data.tolist()
print("Data points:", datapoints)
labels = label
print("Data labels:", labels)
G = nx.Graph()
G.add_nodes_from(np.arange(0, n, 1))
for i in range(n):
for j in range(n):
if i > j:
G.add_edge(i, j, weight=dist(datapoints[i], datapoints[j]))
colors = ['g' for node in G.nodes()]
pos = nx.spring_layout(G)
Data points: [[5.8, 4.0, 1.2, 0.2], [5.7, 4.4, 1.5, 0.4], [5.2, 3.5, 1.5, 0.2], [5.5, 4.2, 1.4, 0.2], [5.8, 2.8, 5.1, 2.4], [7.2, 3.2, 6.0, 1.8], [6.7, 3.1, 5.6, 2.4], [6.9, 3.1, 5.1, 2.3]] Data labels: [0 0 0 0 2 2 2 2]
draw_graph(G, colors, pos)
# Calculate Adjacency matrix of the given Graph
w = np.zeros([n, n])
for i in range(n):
for j in range(n):
temp = G.get_edge_data(i, j, default=0)
if temp != 0:
w[i, j] = temp['weight']
w
array([[0. , 0.5477, 0.8367, 0.4123, 4.6357, 5.3104, 5.0813, 4.6519],
[0.5477, 0. , 1.0488, 0.3606, 4.4193, 5.0892, 4.8477, 4.4385],
[0.8367, 1.0488, 0. , 0.7681, 4.3186, 5.1865, 4.9051, 4.5188],
[0.4123, 0.3606, 0.7681, 0. , 4.5365, 5.2545, 5.013 , 4.6119],
[4.6357, 4.4193, 4.3186, 4.5365, 0. , 1.8138, 1.0724, 1.1446],
[5.3104, 5.0892, 5.1865, 5.2545, 1.8138, 0. , 0.8832, 1.077 ],
[5.0813, 4.8477, 4.9051, 5.013 , 1.0724, 0.8832, 0. , 0.5477],
[4.6519, 4.4385, 4.5188, 4.6119, 1.1446, 1.077 , 0.5477, 0. ]])
best_cost_brute = 0
for b in range(2**n):
x = [int(t) for t in reversed(list(bin(b)[2:].zfill(n)))]
cost = 0
for i in range(n):
for j in range(n):
cost = cost + w[i, j] * x[i] * (1-x[j])
if best_cost_brute < cost:
best_cost_brute = cost
xbest_brute = x
print('case =', '%-30s' % str(x), ' cost =', '%-24s' %
str(cost), 'try =', str(b+1))
colors_brute = ['g' if xbest_brute[i] == 0 else 'c' for i in range(n)]
print('\nBest case(solution) =', '%-30s' %
str(xbest_brute), ' cost =', '%-24s' % str(best_cost_brute))
case = [0, 0, 0, 0, 0, 0, 0, 0] cost = 0.0 try = 1 case = [1, 0, 0, 0, 0, 0, 0, 0] cost = 21.476 try = 2 case = [0, 1, 0, 0, 0, 0, 0, 0] cost = 20.7518 try = 3 case = [1, 1, 0, 0, 0, 0, 0, 0] cost = 41.1324 try = 4 case = [0, 0, 1, 0, 0, 0, 0, 0] cost = 21.5826 try = 5 case = [1, 0, 1, 0, 0, 0, 0, 0] cost = 41.3852 try = 6 case = [0, 1, 1, 0, 0, 0, 0, 0] cost = 40.236799999999995 try = 7 case = [1, 1, 1, 0, 0, 0, 0, 0] cost = 58.944 try = 8 case = [0, 0, 0, 1, 0, 0, 0, 0] cost = 20.956899999999997 try = 9 case = [1, 0, 0, 1, 0, 0, 0, 0] cost = 41.6083 try = 10 case = [0, 1, 0, 1, 0, 0, 0, 0] cost = 40.9875 try = 11 case = [1, 1, 0, 1, 0, 0, 0, 0] cost = 60.5435 try = 12 case = [0, 0, 1, 1, 0, 0, 0, 0] cost = 41.003299999999996 try = 13 case = [1, 0, 1, 1, 0, 0, 0, 0] cost = 59.9813 try = 14 case = [0, 1, 1, 1, 0, 0, 0, 0] cost = 58.936299999999996 try = 15 case = [1, 1, 1, 1, 0, 0, 0, 0] cost = 76.8189 try = 16 case = [0, 0, 0, 0, 1, 0, 0, 0] cost = 21.940900000000003 try = 17 case = [1, 0, 0, 0, 1, 0, 0, 0] cost = 34.1455 try = 18 case = [0, 1, 0, 0, 1, 0, 0, 0] cost = 33.854099999999995 try = 19 case = [1, 1, 0, 0, 1, 0, 0, 0] cost = 44.963300000000004 try = 20 case = [0, 0, 1, 0, 1, 0, 0, 0] cost = 34.8863 try = 21 case = [1, 0, 1, 0, 1, 0, 0, 0] cost = 45.417500000000004 try = 22 case = [0, 1, 1, 0, 1, 0, 0, 0] cost = 44.701899999999995 try = 23 case = [1, 1, 1, 0, 1, 0, 0, 0] cost = 54.137699999999995 try = 24 case = [0, 0, 0, 1, 1, 0, 0, 0] cost = 33.824799999999996 try = 25 case = [1, 0, 0, 1, 1, 0, 0, 0] cost = 45.204800000000006 try = 26 case = [0, 1, 0, 1, 1, 0, 0, 0] cost = 45.0168 try = 27 case = [1, 1, 0, 1, 1, 0, 0, 0] cost = 55.3014 try = 28 case = [0, 0, 1, 1, 1, 0, 0, 0] cost = 45.233999999999995 try = 29 case = [1, 0, 1, 1, 1, 0, 0, 0] cost = 54.940599999999996 try = 30 case = [0, 1, 1, 1, 1, 0, 0, 0] cost = 54.328399999999995 try = 31 case = [1, 1, 1, 1, 1, 0, 0, 0] cost = 62.9396 try = 32 case = [0, 0, 0, 0, 0, 1, 0, 0] cost = 24.614599999999996 try = 33 case = [1, 0, 0, 0, 0, 1, 0, 0] cost = 35.4698 try = 34 case = [0, 1, 0, 0, 0, 1, 0, 0] cost = 35.187999999999995 try = 35 case = [1, 1, 0, 0, 0, 1, 0, 0] cost = 44.94780000000001 try = 36 case = [0, 0, 1, 0, 0, 1, 0, 0] cost = 35.8242 try = 37 case = [1, 0, 1, 0, 0, 1, 0, 0] cost = 45.006 try = 38 case = [0, 1, 1, 0, 0, 1, 0, 0] cost = 44.300000000000004 try = 39 case = [1, 1, 1, 0, 0, 1, 0, 0] cost = 52.3864 try = 40 case = [0, 0, 0, 1, 0, 1, 0, 0] cost = 35.06249999999999 try = 41 case = [1, 0, 0, 1, 0, 1, 0, 0] cost = 45.0931 try = 42 case = [0, 1, 0, 1, 0, 1, 0, 0] cost = 44.9147 try = 43 case = [1, 1, 0, 1, 0, 1, 0, 0] cost = 53.8499 try = 44 case = [0, 0, 1, 1, 0, 1, 0, 0] cost = 44.7359 try = 45 case = [1, 0, 1, 1, 0, 1, 0, 0] cost = 53.0931 try = 46 case = [0, 1, 1, 1, 0, 1, 0, 0] cost = 52.4905 try = 47 case = [1, 1, 1, 1, 0, 1, 0, 0] cost = 59.75229999999999 try = 48 case = [0, 0, 0, 0, 1, 1, 0, 0] cost = 42.9279 try = 49 case = [1, 0, 0, 0, 1, 1, 0, 0] cost = 44.511700000000005 try = 50 case = [0, 1, 0, 0, 1, 1, 0, 0] cost = 44.6627 try = 51 case = [1, 1, 0, 0, 1, 1, 0, 0] cost = 45.1511 try = 52 case = [0, 0, 1, 0, 1, 1, 0, 0] cost = 45.500299999999996 try = 53 case = [1, 0, 1, 0, 1, 1, 0, 0] cost = 45.4107 try = 54 case = [0, 1, 1, 0, 1, 1, 0, 0] cost = 45.137499999999996 try = 55 case = [1, 1, 1, 0, 1, 1, 0, 0] cost = 43.9525 try = 56 case = [0, 0, 0, 1, 1, 1, 0, 0] cost = 44.3028 try = 57 case = [1, 0, 0, 1, 1, 1, 0, 0] cost = 45.062 try = 58 case = [0, 1, 0, 1, 1, 1, 0, 0] cost = 45.3164 try = 59 case = [1, 1, 0, 1, 1, 1, 0, 0] cost = 44.9802 try = 60 case = [0, 0, 1, 1, 1, 1, 0, 0] cost = 45.339 try = 61 case = [1, 0, 1, 1, 1, 1, 0, 0] cost = 44.4248 try = 62 case = [0, 1, 1, 1, 1, 1, 0, 0] cost = 44.254999999999995 try = 63 case = [1, 1, 1, 1, 1, 1, 0, 0] cost = 42.2454 try = 64 case = [0, 0, 0, 0, 0, 0, 1, 0] cost = 22.350399999999997 try = 65 case = [1, 0, 0, 0, 0, 0, 1, 0] cost = 33.6638 try = 66 case = [0, 1, 0, 0, 0, 0, 1, 0] cost = 33.406800000000004 try = 67 case = [1, 1, 0, 0, 0, 0, 1, 0] cost = 43.62480000000001 try = 68 case = [0, 0, 1, 0, 0, 0, 1, 0] cost = 34.1228 try = 69 case = [1, 0, 1, 0, 0, 0, 1, 0] cost = 43.7628 try = 70 case = [0, 1, 1, 0, 0, 0, 1, 0] cost = 43.0816 try = 71 case = [1, 1, 1, 0, 0, 0, 1, 0] cost = 51.626200000000004 try = 72 case = [0, 0, 0, 1, 0, 0, 1, 0] cost = 33.2813 try = 73 case = [1, 0, 0, 1, 0, 0, 1, 0] cost = 43.7701 try = 74 case = [0, 1, 0, 1, 0, 0, 1, 0] cost = 43.6165 try = 75 case = [1, 1, 0, 1, 0, 0, 1, 0] cost = 53.0099 try = 76 case = [0, 0, 1, 1, 0, 0, 1, 0] cost = 43.517500000000005 try = 77 case = [1, 0, 1, 1, 0, 0, 1, 0] cost = 52.3329 try = 78 case = [0, 1, 1, 1, 0, 0, 1, 0] cost = 51.7551 try = 79 case = [1, 1, 1, 1, 0, 0, 1, 0] cost = 59.475100000000005 try = 80 case = [0, 0, 0, 0, 1, 0, 1, 0] cost = 42.146499999999996 try = 81 case = [1, 0, 0, 0, 1, 0, 1, 0] cost = 44.1885 try = 82 case = [0, 1, 0, 0, 1, 0, 1, 0] cost = 44.3643 try = 83 case = [1, 1, 0, 0, 1, 0, 1, 0] cost = 45.3109 try = 84 case = [0, 0, 1, 0, 1, 0, 1, 0] cost = 45.2817 try = 85 case = [1, 0, 1, 0, 1, 0, 1, 0] cost = 45.65029999999999 try = 86 case = [0, 1, 1, 0, 1, 0, 1, 0] cost = 45.40189999999999 try = 87 case = [1, 1, 1, 0, 1, 0, 1, 0] cost = 44.6751 try = 88 case = [0, 0, 0, 1, 1, 0, 1, 0] cost = 44.004400000000004 try = 89 case = [1, 0, 0, 1, 1, 0, 1, 0] cost = 45.22179999999999 try = 90 case = [0, 1, 0, 1, 1, 0, 1, 0] cost = 45.50099999999999 try = 91 case = [1, 1, 0, 1, 1, 0, 1, 0] cost = 45.62299999999999 try = 92 case = [0, 0, 1, 1, 1, 0, 1, 0] cost = 45.6034 try = 93 case = [1, 0, 1, 1, 1, 0, 1, 0] cost = 45.1474 try = 94 case = [0, 1, 1, 1, 1, 0, 1, 0] cost = 45.002399999999994 try = 95 case = [1, 1, 1, 1, 1, 0, 1, 0] cost = 43.45099999999999 try = 96 case = [0, 0, 0, 0, 0, 1, 1, 0] cost = 45.19859999999999 try = 97 case = [1, 0, 0, 0, 0, 1, 1, 0] cost = 45.89119999999999 try = 98 case = [0, 1, 0, 0, 0, 1, 1, 0] cost = 46.07659999999999 try = 99 case = [1, 1, 0, 0, 0, 1, 1, 0] cost = 45.67379999999999 try = 100 case = [0, 0, 1, 0, 0, 1, 1, 0] cost = 46.598 try = 101 case = [1, 0, 1, 0, 0, 1, 1, 0] cost = 45.6172 try = 102 case = [0, 1, 1, 0, 0, 1, 1, 0] cost = 45.3784 try = 103 case = [1, 1, 1, 0, 0, 1, 1, 0] cost = 43.3022 try = 104 case = [0, 0, 0, 1, 0, 1, 1, 0] cost = 45.62049999999999 try = 105 case = [1, 0, 0, 1, 0, 1, 1, 0] cost = 45.48849999999999 try = 106 case = [0, 1, 0, 1, 0, 1, 1, 0] cost = 45.77729999999999 try = 107 case = [1, 1, 0, 1, 0, 1, 1, 0] cost = 44.5499 try = 108 case = [0, 0, 1, 1, 0, 1, 1, 0] cost = 45.48369999999999 try = 109 case = [1, 0, 1, 1, 0, 1, 1, 0] cost = 43.6783 try = 110 case = [0, 1, 1, 1, 0, 1, 1, 0] cost = 43.542899999999996 try = 111 case = [1, 1, 1, 1, 0, 1, 1, 0] cost = 40.6421 try = 112 case = [0, 0, 0, 0, 1, 1, 1, 0] cost = 61.367099999999986 try = 113 case = [1, 0, 0, 0, 1, 1, 1, 0] cost = 52.7883 try = 114 case = [0, 1, 0, 0, 1, 1, 1, 0] cost = 53.40649999999999 try = 115 case = [1, 1, 0, 0, 1, 1, 1, 0] cost = 43.73229999999999 try = 116 case = [0, 0, 1, 0, 1, 1, 1, 0] cost = 54.129299999999986 try = 117 case = [1, 0, 1, 0, 1, 1, 1, 0] cost = 43.8771 try = 118 case = [0, 1, 1, 0, 1, 1, 1, 0] cost = 44.071099999999994 try = 119 case = [1, 1, 1, 0, 1, 1, 1, 0] cost = 32.7235 try = 120 case = [0, 0, 0, 1, 1, 1, 1, 0] cost = 52.715999999999994 try = 121 case = [1, 0, 0, 1, 1, 1, 1, 0] cost = 43.3126 try = 122 case = [0, 1, 0, 1, 1, 1, 1, 0] cost = 44.0342 try = 123 case = [1, 1, 0, 1, 1, 1, 1, 0] cost = 33.535399999999996 try = 124 case = [0, 0, 1, 1, 1, 1, 1, 0] cost = 43.941999999999986 try = 125 case = [1, 0, 1, 1, 1, 1, 1, 0] cost = 32.865199999999994 try = 126 case = [0, 1, 1, 1, 1, 1, 1, 0] cost = 33.1626 try = 127 case = [1, 1, 1, 1, 1, 1, 1, 0] cost = 20.9904 try = 128 case = [0, 0, 0, 0, 0, 0, 0, 1] cost = 20.9904 try = 129 case = [1, 0, 0, 0, 0, 0, 0, 1] cost = 33.1626 try = 130 case = [0, 1, 0, 0, 0, 0, 0, 1] cost = 32.8652 try = 131 case = [1, 1, 0, 0, 0, 0, 0, 1] cost = 43.94199999999999 try = 132 case = [0, 0, 1, 0, 0, 0, 0, 1] cost = 33.5354 try = 133 case = [1, 0, 1, 0, 0, 0, 0, 1] cost = 44.034199999999984 try = 134 case = [0, 1, 1, 0, 0, 0, 0, 1] cost = 43.31259999999999 try = 135 case = [1, 1, 1, 0, 0, 0, 0, 1] cost = 52.715999999999994 try = 136 case = [0, 0, 0, 1, 0, 0, 0, 1] cost = 32.7235 try = 137 case = [1, 0, 0, 1, 0, 0, 0, 1] cost = 44.071099999999994 try = 138 case = [0, 1, 0, 1, 0, 0, 0, 1] cost = 43.87709999999999 try = 139 case = [1, 1, 0, 1, 0, 0, 0, 1] cost = 54.12929999999999 try = 140 case = [0, 0, 1, 1, 0, 0, 0, 1] cost = 43.73229999999999 try = 141 case = [1, 0, 1, 1, 0, 0, 0, 1] cost = 53.406499999999994 try = 142 case = [0, 1, 1, 1, 0, 0, 0, 1] cost = 52.788299999999985 try = 143 case = [1, 1, 1, 1, 0, 0, 0, 1] cost = 61.367099999999986 try = 144 case = [0, 0, 0, 0, 1, 0, 0, 1] cost = 40.6421 try = 145 case = [1, 0, 0, 0, 1, 0, 0, 1] cost = 43.54289999999999 try = 146 case = [0, 1, 0, 0, 1, 0, 0, 1] cost = 43.67829999999999 try = 147 case = [1, 1, 0, 0, 1, 0, 0, 1] cost = 45.48369999999999 try = 148 case = [0, 0, 1, 0, 1, 0, 0, 1] cost = 44.549899999999994 try = 149 case = [1, 0, 1, 0, 1, 0, 0, 1] cost = 45.7773 try = 150 case = [0, 1, 1, 0, 1, 0, 0, 1] cost = 45.488499999999995 try = 151 case = [1, 1, 1, 0, 1, 0, 0, 1] cost = 45.62049999999999 try = 152 case = [0, 0, 0, 1, 1, 0, 0, 1] cost = 43.30219999999999 try = 153 case = [1, 0, 0, 1, 1, 0, 0, 1] cost = 45.37839999999999 try = 154 case = [0, 1, 0, 1, 1, 0, 0, 1] cost = 45.61719999999999 try = 155 case = [1, 1, 0, 1, 1, 0, 0, 1] cost = 46.59799999999999 try = 156 case = [0, 0, 1, 1, 1, 0, 0, 1] cost = 45.67379999999999 try = 157 case = [1, 0, 1, 1, 1, 0, 0, 1] cost = 46.0766 try = 158 case = [0, 1, 1, 1, 1, 0, 0, 1] cost = 45.8912 try = 159 case = [1, 1, 1, 1, 1, 0, 0, 1] cost = 45.1986 try = 160 case = [0, 0, 0, 0, 0, 1, 0, 1] cost = 43.45099999999999 try = 161 case = [1, 0, 0, 0, 0, 1, 0, 1] cost = 45.00239999999999 try = 162 case = [0, 1, 0, 0, 0, 1, 0, 1] cost = 45.14739999999999 try = 163 case = [1, 1, 0, 0, 0, 1, 0, 1] cost = 45.60339999999999 try = 164 case = [0, 0, 1, 0, 0, 1, 0, 1] cost = 45.62299999999999 try = 165 case = [1, 0, 1, 0, 0, 1, 0, 1] cost = 45.501 try = 166 case = [0, 1, 1, 0, 0, 1, 0, 1] cost = 45.221799999999995 try = 167 case = [1, 1, 1, 0, 0, 1, 0, 1] cost = 44.004400000000004 try = 168 case = [0, 0, 0, 1, 0, 1, 0, 1] cost = 44.675099999999986 try = 169 case = [1, 0, 0, 1, 0, 1, 0, 1] cost = 45.40189999999999 try = 170 case = [0, 1, 0, 1, 0, 1, 0, 1] cost = 45.650299999999994 try = 171 case = [1, 1, 0, 1, 0, 1, 0, 1] cost = 45.2817 try = 172 case = [0, 0, 1, 1, 0, 1, 0, 1] cost = 45.3109 try = 173 case = [1, 0, 1, 1, 0, 1, 0, 1] cost = 44.36429999999999 try = 174 case = [0, 1, 1, 1, 0, 1, 0, 1] cost = 44.1885 try = 175 case = [1, 1, 1, 1, 0, 1, 0, 1] cost = 42.146499999999996 try = 176 case = [0, 0, 0, 0, 1, 1, 0, 1] cost = 59.4751 try = 177 case = [1, 0, 0, 0, 1, 1, 0, 1] cost = 51.75509999999999 try = 178 case = [0, 1, 0, 0, 1, 1, 0, 1] cost = 52.332899999999995 try = 179 case = [1, 1, 0, 0, 1, 1, 0, 1] cost = 43.5175 try = 180 case = [0, 0, 1, 0, 1, 1, 0, 1] cost = 53.009899999999995 try = 181 case = [1, 0, 1, 0, 1, 1, 0, 1] cost = 43.616499999999995 try = 182 case = [0, 1, 1, 0, 1, 1, 0, 1] cost = 43.7701 try = 183 case = [1, 1, 1, 0, 1, 1, 0, 1] cost = 33.2813 try = 184 case = [0, 0, 0, 1, 1, 1, 0, 1] cost = 51.6262 try = 185 case = [1, 0, 0, 1, 1, 1, 0, 1] cost = 43.081599999999995 try = 186 case = [0, 1, 0, 1, 1, 1, 0, 1] cost = 43.7628 try = 187 case = [1, 1, 0, 1, 1, 1, 0, 1] cost = 34.12279999999999 try = 188 case = [0, 0, 1, 1, 1, 1, 0, 1] cost = 43.62479999999999 try = 189 case = [1, 0, 1, 1, 1, 1, 0, 1] cost = 33.4068 try = 190 case = [0, 1, 1, 1, 1, 1, 0, 1] cost = 33.6638 try = 191 case = [1, 1, 1, 1, 1, 1, 0, 1] cost = 22.350399999999997 try = 192 case = [0, 0, 0, 0, 0, 0, 1, 1] cost = 42.2454 try = 193 case = [1, 0, 0, 0, 0, 0, 1, 1] cost = 44.254999999999995 try = 194 case = [0, 1, 0, 0, 0, 0, 1, 1] cost = 44.42479999999999 try = 195 case = [1, 1, 0, 0, 0, 0, 1, 1] cost = 45.339 try = 196 case = [0, 0, 1, 0, 0, 0, 1, 1] cost = 44.98019999999999 try = 197 case = [1, 0, 1, 0, 0, 0, 1, 1] cost = 45.316399999999994 try = 198 case = [0, 1, 1, 0, 0, 0, 1, 1] cost = 45.06199999999999 try = 199 case = [1, 1, 1, 0, 0, 0, 1, 1] cost = 44.3028 try = 200 case = [0, 0, 0, 1, 0, 0, 1, 1] cost = 43.95249999999999 try = 201 case = [1, 0, 0, 1, 0, 0, 1, 1] cost = 45.137499999999996 try = 202 case = [0, 1, 0, 1, 0, 0, 1, 1] cost = 45.41069999999999 try = 203 case = [1, 1, 0, 1, 0, 0, 1, 1] cost = 45.500299999999996 try = 204 case = [0, 0, 1, 1, 0, 0, 1, 1] cost = 45.15109999999999 try = 205 case = [1, 0, 1, 1, 0, 0, 1, 1] cost = 44.662699999999994 try = 206 case = [0, 1, 1, 1, 0, 0, 1, 1] cost = 44.5117 try = 207 case = [1, 1, 1, 1, 0, 0, 1, 1] cost = 42.9279 try = 208 case = [0, 0, 0, 0, 1, 0, 1, 1] cost = 59.75229999999999 try = 209 case = [1, 0, 0, 0, 1, 0, 1, 1] cost = 52.4905 try = 210 case = [0, 1, 0, 0, 1, 0, 1, 1] cost = 53.09309999999999 try = 211 case = [1, 1, 0, 0, 1, 0, 1, 1] cost = 44.7359 try = 212 case = [0, 0, 1, 0, 1, 0, 1, 1] cost = 53.84989999999999 try = 213 case = [1, 0, 1, 0, 1, 0, 1, 1] cost = 44.914699999999996 try = 214 case = [0, 1, 1, 0, 1, 0, 1, 1] cost = 45.09309999999999 try = 215 case = [1, 1, 1, 0, 1, 0, 1, 1] cost = 35.0625 try = 216 case = [0, 0, 0, 1, 1, 0, 1, 1] cost = 52.386399999999995 try = 217 case = [1, 0, 0, 1, 1, 0, 1, 1] cost = 44.3 try = 218 case = [0, 1, 0, 1, 1, 0, 1, 1] cost = 45.00599999999999 try = 219 case = [1, 1, 0, 1, 1, 0, 1, 1] cost = 35.8242 try = 220 case = [0, 0, 1, 1, 1, 0, 1, 1] cost = 44.947799999999994 try = 221 case = [1, 0, 1, 1, 1, 0, 1, 1] cost = 35.187999999999995 try = 222 case = [0, 1, 1, 1, 1, 0, 1, 1] cost = 35.4698 try = 223 case = [1, 1, 1, 1, 1, 0, 1, 1] cost = 24.614599999999996 try = 224 case = [0, 0, 0, 0, 0, 1, 1, 1] cost = 62.939599999999984 try = 225 case = [1, 0, 0, 0, 0, 1, 1, 1] cost = 54.32839999999999 try = 226 case = [0, 1, 0, 0, 0, 1, 1, 1] cost = 54.94059999999999 try = 227 case = [1, 1, 0, 0, 0, 1, 1, 1] cost = 45.233999999999995 try = 228 case = [0, 0, 1, 0, 0, 1, 1, 1] cost = 55.301399999999994 try = 229 case = [1, 0, 1, 0, 0, 1, 1, 1] cost = 45.016799999999996 try = 230 case = [0, 1, 1, 0, 0, 1, 1, 1] cost = 45.20479999999999 try = 231 case = [1, 1, 1, 0, 0, 1, 1, 1] cost = 33.824799999999996 try = 232 case = [0, 0, 0, 1, 0, 1, 1, 1] cost = 54.13769999999999 try = 233 case = [1, 0, 0, 1, 0, 1, 1, 1] cost = 44.701899999999995 try = 234 case = [0, 1, 0, 1, 0, 1, 1, 1] cost = 45.41749999999999 try = 235 case = [1, 1, 0, 1, 0, 1, 1, 1] cost = 34.8863 try = 236 case = [0, 0, 1, 1, 0, 1, 1, 1] cost = 44.9633 try = 237 case = [1, 0, 1, 1, 0, 1, 1, 1] cost = 33.854099999999995 try = 238 case = [0, 1, 1, 1, 0, 1, 1, 1] cost = 34.14549999999999 try = 239 case = [1, 1, 1, 1, 0, 1, 1, 1] cost = 21.940900000000003 try = 240 case = [0, 0, 0, 0, 1, 1, 1, 1] cost = 76.8189 try = 241 case = [1, 0, 0, 0, 1, 1, 1, 1] cost = 58.93629999999999 try = 242 case = [0, 1, 0, 0, 1, 1, 1, 1] cost = 59.98129999999999 try = 243 case = [1, 1, 0, 0, 1, 1, 1, 1] cost = 41.003299999999996 try = 244 case = [0, 0, 1, 0, 1, 1, 1, 1] cost = 60.54349999999999 try = 245 case = [1, 0, 1, 0, 1, 1, 1, 1] cost = 40.9875 try = 246 case = [0, 1, 1, 0, 1, 1, 1, 1] cost = 41.60829999999999 try = 247 case = [1, 1, 1, 0, 1, 1, 1, 1] cost = 20.956899999999997 try = 248 case = [0, 0, 0, 1, 1, 1, 1, 1] cost = 58.94399999999999 try = 249 case = [1, 0, 0, 1, 1, 1, 1, 1] cost = 40.236799999999995 try = 250 case = [0, 1, 0, 1, 1, 1, 1, 1] cost = 41.38519999999999 try = 251 case = [1, 1, 0, 1, 1, 1, 1, 1] cost = 21.5826 try = 252 case = [0, 0, 1, 1, 1, 1, 1, 1] cost = 41.1324 try = 253 case = [1, 0, 1, 1, 1, 1, 1, 1] cost = 20.7518 try = 254 case = [0, 1, 1, 1, 1, 1, 1, 1] cost = 21.476 try = 255 case = [1, 1, 1, 1, 1, 1, 1, 1] cost = 0.0 try = 256 Best case(solution) = [1, 1, 1, 1, 0, 0, 0, 0] cost = 76.8189
draw_graph(G, colors_brute, pos)
from scipy.optimize import minimize
from tqdm import tqdm
expectation = get_expectation(G)
p = 64
res = []
for i in tqdm(range(1, p+1)):
res.append(minimize(expectation, np.ones(i*2)*np.pi/2,
method='COBYLA', options={'maxiter': 2500}))
100%|██████████| 64/64 [27:48<00:00, 26.08s/it]
approx = []
for i in range(p):
approx.append(-res[i].fun/best_cost_brute)
x = np.arange(1, p+1, 1)
plt.plot(x, approx, marker='+', c='k', linestyle='--')
plt.ylim((0.5, 1))
plt.xlim(0, p+1)
plt.xlabel('p')
plt.ylabel('Approximation')
plt.grid(True)
plt.show()
best_p = np.argmax(approx)
print("The best p(iterationo number) is", best_p)
The best p(iterationo number) is 61
res[best_p].x
array([2.56684494, 1.56483545, 1.5621608 , 1.57836476, 1.57865107,
1.57720481, 1.58521484, 1.55353739, 1.58867437, 1.57171891,
1.55781865, 1.55654356, 1.57976623, 1.56494557, 1.04496096,
1.42141525, 1.57460558, 1.73911607, 1.3983032 , 1.59075105,
1.40641875, 1.58193399, 1.58353215, 1.53366763, 1.48391421,
1.7611838 , 1.66661049, 1.58522614, 1.57536247, 1.50840711,
1.52984813, 1.58244461, 1.58886206, 1.55386512, 1.65116939,
1.66743861, 1.66443053, 1.62789976, 1.58128019, 1.5784145 ,
1.64867114, 1.65663223, 1.58219872, 1.59280698, 1.5851417 ,
1.58586683, 1.58213626, 1.64671377, 1.59275626, 1.50361299,
2.6081778 , 1.54312801, 1.56965811, 1.57676506, 1.58781477,
1.58042926, 1.56107681, 1.56479734, 1.55714685, 1.55217294,
1.57405568, 1.56906732, 1.56724253, 1.66693597, 1.46699857,
1.50473237, 1.62784875, 1.59999419, 1.53255567, 1.76043189,
1.37016888, 1.57265542, 1.56742007, 1.58288699, 1.55317191,
1.56766415, 1.567071 , 1.55838861, 1.57752088, 1.57328378,
1.56124554, 1.56628699, 1.56685456, 1.7459759 , 1.38952252,
1.74922353, 1.38905124, 1.5664524 , 1.56880225, 1.58090604,
1.55410481, 1.5467613 , 1.59281826, 1.58663764, 1.54808424,
1.56906742, 1.56868147, 1.56435594, 1.57038934, 1.56878319,
1.56625478, 1.5678789 , 1.56801579, 1.56680874, 1.5691402 ,
1.74404436, 1.39120431, 1.57306748, 1.56328153, 1.74250522,
1.39174326, 1.70324979, 1.43089726, 1.63028841, 1.585895 ,
1.62246736, 1.53731088, 1.5009015 , 1.52762615, 1.46866889,
1.54763462, 1.61891966, 1.54241216, 1.53072289])
from qiskit.visualization import plot_histogram
backend = Aer.get_backend('aer_simulator')
backend.shots = 512
qc_res = create_qaoa_circ(G, res[best_p].x)
counts = backend.run(qc_res, seed_simulator=10).result().get_counts()
plot_histogram(counts, figsize=(32, 5), title='512 Shots')
# # Plot the Quantum Circuit of QAOA
# qc_res.draw(output='mpl', plot_barriers=True).savefig('QAOA_circuit1.png', dpi=480)
# # qc_res.draw(output='mpl', plot_barriers=True)
# qc_res.draw(output='mpl', plot_barriers=True, fold=-1).savefig('QAOA_circuit(full).png', dpi=720)
# qc_res.draw(output='mpl', plot_barriers=True, fold=-1)
xbest_qaoa = list(map(int, list(max(counts, key=counts.get))))
colors_qaoa = ['g' if xbest_qaoa[i] == 0 else 'c' for i in range(n)]
print('xbest_brute :', xbest_brute)
print('QAOA :', xbest_qaoa)
xbest_brute : [1, 1, 1, 1, 0, 0, 0, 0] QAOA : [1, 1, 1, 1, 0, 0, 0, 0]
draw_graph(G, colors_brute, pos)
draw_graph(G, colors_qaoa, pos)
print('Best solution - Brute Force : ' +
str(xbest_brute) + ', cost = ' + str(best_cost_brute))
print('Best solution - QAOA : ' + str(xbest_qaoa) +
', cost = ' + str(-res[best_p].fun))
Best solution - Brute Force : [1, 1, 1, 1, 0, 0, 0, 0], cost = 76.8189 Best solution - QAOA : [1, 1, 1, 1, 0, 0, 0, 0], cost = 52.24974521484376
# Visualising the clusters
x = data_iris_qaoa
y = np.array(xbest_brute)
plt.figure(figsize=(12, 8))
plt.scatter(x[y == 0, 0], x[y == 0, 1], s=100, c='purple', label='Cluster A')
plt.scatter(x[y == 1, 0], x[y == 1, 1], s=100, c='orange', label='Cluster B')
plt.title('Clustering using Brute-Force')
plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)')
plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
# Plotting the centroids of the clusters
# plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=100, c='red', label='Centroids')
# plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
# Visualising the clusters
x = data_iris_qaoa
y = np.array(xbest_qaoa)
plt.figure(figsize=(12, 8))
plt.scatter(x[y == 0, 0], x[y == 0, 1], s=100, c='purple', label='Cluster A')
plt.scatter(x[y == 1, 0], x[y == 1, 1], s=100, c='orange', label='Cluster B')
plt.title('Clustering using QAOA')
plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)')
plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
# Plotting the centroids of the clusters
# plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=100, c='red', label='Centroids')
# plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
# Visualising the clusters
x = data_iris_qaoa
y = np.array(data_iris_qaoa_label)
plt.figure(figsize=(12, 8))
plt.scatter(x[y == 0, 0], x[y == 0, 1], s=100,
c='purple', label='Cluster A(setosa)')
plt.scatter(x[y == 1, 0], x[y == 1, 1], s=100,
c='orange', label='Cluster B(versicolor)')
plt.scatter(x[y == 2, 0], x[y == 2, 1], s=100,
c='orange', label='Cluster B(virginica)')
plt.title('Clustering with True labels')
plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)')
plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
# Plotting the centroids of the clusters
# plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=100, c='red', label='Centroids')
# plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
import os
os.environ["OMP_NUM_THREADS"] = '1'
# Finding the optimum number of clusters for k-means classification
from sklearn.cluster import KMeans
x = data_iris_sample.iloc[:, 0:4]
x
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | |
|---|---|---|---|---|
| 14 | 5.8 | 4.0 | 1.2 | 0.2 |
| 15 | 5.7 | 4.4 | 1.5 | 0.4 |
| 27 | 5.2 | 3.5 | 1.5 | 0.2 |
| 33 | 5.5 | 4.2 | 1.4 | 0.2 |
| 114 | 5.8 | 2.8 | 5.1 | 2.4 |
| 125 | 7.2 | 3.2 | 6.0 | 1.8 |
| 140 | 6.7 | 3.1 | 5.6 | 2.4 |
| 141 | 6.9 | 3.1 | 5.1 | 2.3 |
# Finding the optimum number of clusters for k-means classification
wcss = []
max_num_cluster = len(data_iris_sample)
for i in range(1, max_num_cluster):
kmeans = KMeans(n_clusters=i, init='k-means++',
max_iter=300, n_init=10, random_state=0)
kmeans.fit(x)
wcss.append(kmeans.inertia_)
plt.plot(range(1, max_num_cluster), wcss)
plt.xticks(np.arange(1, max_num_cluster, step=1)) # Set label locations.
plt.title('The elbow method')
plt.xlabel('Number of clusters')
plt.ylabel('Within Cluster Sum of Squares') # within cluster sum of squares
plt.show()
C:\Users\user1\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1036: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1. warnings.warn(
num_cluster = 2
kmeans = KMeans(n_clusters=num_cluster, init='k-means++',
max_iter=300, n_init=10, random_state=0)
y_kmeans = kmeans.fit_predict(x)
x.iloc[0:10, :]
y_kmeans
array([1, 1, 1, 1, 0, 0, 0, 0])
y_kmeans
array([1, 1, 1, 1, 0, 0, 0, 0])
# Visualising the clusters
plt.figure(figsize=(12, 8))
plt.scatter(x.iloc[y_kmeans == 0, 0], x.iloc[y_kmeans ==
0, 1], s=100, c='purple', label='Cluster A')
plt.scatter(x.iloc[y_kmeans == 1, 0], x.iloc[y_kmeans ==
1, 1], s=100, c='orange', label='Cluster B')
# plt.scatter(x.iloc[y_kmeans == 2, 0], x.iloc[y_kmeans == 2, 1], s=100, c='green', label='Cluster C')
plt.title('Clustering with K-Means')
plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)')
plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
# Plotting the centroids of the clusters
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[
:, 1], s=100, marker="v", c='red', label='Centroids')
plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
np.random.seed(100)
num_data = 50
x11 = np.linspace(0.3, 0.7, 20)
x12 = np.linspace(1.3, 1.8, 15)
x13 = np.linspace(2.4, 3, 15)
x1 = np.concatenate((x11, x12, x13), axis=None)
error = np.random.normal(1, 0.5, num_data)
x2 = 1.5*x1+2+error
fig = plt.figure(figsize=(7, 7))
fig.set_facecolor('white')
plt.scatter(x1, x2, color='k')
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()
X = np.stack([x1, x2], axis=1)
init = np.array([[2., 4.], [1., 5.], [2.5, 6.]])
kmeans = KMeans(n_clusters=3, init=init)
kmeans.fit(X)
labels = kmeans.labels_
def get_silhouette_results(X, labels):
def get_sum_distance(target_x, target_cluster):
res = np.sum([np.linalg.norm(target_x-x) for x in target_cluster])
return res
'''
각 데이터 포인트를 돌면서 a(i), b(i)를 계산
그리고 s(i)를 계산한다.
마지막으로 Silhouette(실루엣) Coefficient를 계산한다.
'''
uniq_labels = np.unique(labels)
silhouette_val_list = []
for i in range(len(labels)):
target_data = X[i]
# calculate a(i)
target_label = labels[i]
target_cluster_data_idx = np.where(labels == target_label)[0]
if len(target_cluster_data_idx) == 1:
silhouette_val_list.append(0)
continue
else:
target_cluster_data = X[target_cluster_data_idx]
temp1 = get_sum_distance(target_data, target_cluster_data)
a_i = temp1/(target_cluster_data.shape[0]-1)
# calculate b(i)
b_i_list = []
label_list = uniq_labels[np.unique(labels) != target_label]
for ll in label_list:
other_cluster_data_idx = np.where(labels == ll)[0]
other_cluster_data = X[other_cluster_data_idx]
temp2 = get_sum_distance(target_data, other_cluster_data)
temp_b_i = temp2/other_cluster_data.shape[0]
b_i_list.append(temp_b_i)
b_i = min(b_i_list)
s_i = (b_i-a_i)/max(a_i, b_i)
silhouette_val_list.append(s_i)
silhouette_coef_list = []
for ul in uniq_labels:
temp3 = np.mean(
[s for s, l in zip(silhouette_val_list, labels) if l == ul])
silhouette_coef_list.append(temp3)
silhouette_coef = max(silhouette_coef_list)
return (silhouette_coef, np.array(silhouette_val_list))
silhouette_coef, silhouette_val_list = get_silhouette_results(X, labels)
print(silhouette_coef)
0.7434423527756951
import seaborn as sns
# 각 클러스터별로 Silhouette(실루엣) 값을 정렬한다.
uniq_labels = np.unique(labels)
sorted_cluster_svl = []
rearr_labels = []
for ul in uniq_labels:
labels_idx = np.where(labels == ul)[0]
target_svl = silhouette_val_list[labels_idx]
sorted_cluster_svl += sorted(target_svl)
rearr_labels += [ul]*len(target_svl)
colors = sns.color_palette('hls', len(uniq_labels))
color_labels = [colors[i] for i in rearr_labels]
fig = plt.figure(figsize=(6, 10))
fig.set_facecolor('white')
plt.barh(range(len(sorted_cluster_svl)),
sorted_cluster_svl, color=color_labels)
plt.ylabel('Data Index')
plt.xlabel('Silhouette Value')
plt.axvline(x=np.mean(sorted_cluster_svl),
color='black', label='avg. Silhouette score')
plt.text(np.mean(sorted_cluster_svl)+0.02, -1,
'Avg. \nSilhouette score='+str(round(np.mean(sorted_cluster_svl), 4)))
plt.show()
X = data_iris_qaoa
labels = np.array(xbest_brute)
silhouette_coef, silhouette_val_list = get_silhouette_results(X, labels)
print(silhouette_coef)
0.8616729348759894
import seaborn as sns
# 각 클러스터별로 Silhouette(실루엣) 값을 정렬한다.
uniq_labels = np.unique(labels)
sorted_cluster_svl = []
rearr_labels = []
for ul in uniq_labels:
labels_idx = np.where(labels == ul)[0]
target_svl = silhouette_val_list[labels_idx]
sorted_cluster_svl += sorted(target_svl)
rearr_labels += [ul]*len(target_svl)
colors = sns.color_palette('hls', len(uniq_labels))
color_labels = [colors[i] for i in rearr_labels]
fig = plt.figure(figsize=(6, 10))
fig.set_facecolor('white')
plt.barh(range(len(sorted_cluster_svl)),
sorted_cluster_svl, color=color_labels)
plt.title('Silhoutte Score of Brute Force')
plt.ylabel('Data Index')
plt.xlabel('Silhouette Value')
plt.axvline(x=np.mean(sorted_cluster_svl),
color='black', label='avg. Silhouette score')
plt.text(np.mean(sorted_cluster_svl)+0.02, -1,
'Avg. \nSilhouette score='+str(round(np.mean(sorted_cluster_svl), 4)))
plt.show()
X = data_iris_qaoa
labels = np.array(xbest_qaoa)
silhouette_coef, silhouette_val_list = get_silhouette_results(X, labels)
print(silhouette_coef)
0.8616729348759894
import seaborn as sns
# 각 클러스터별로 Silhouette(실루엣) 값을 정렬한다.
uniq_labels = np.unique(labels)
sorted_cluster_svl = []
rearr_labels = []
for ul in uniq_labels:
labels_idx = np.where(labels == ul)[0]
target_svl = silhouette_val_list[labels_idx]
sorted_cluster_svl += sorted(target_svl)
rearr_labels += [ul]*len(target_svl)
colors = sns.color_palette('hls', len(uniq_labels))
color_labels = [colors[i] for i in rearr_labels]
fig = plt.figure(figsize=(6, 10))
fig.set_facecolor('white')
plt.barh(range(len(sorted_cluster_svl)),
sorted_cluster_svl, color=color_labels)
plt.title('Silhoutte Score of QAOA')
plt.ylabel('Data Index')
plt.xlabel('Silhouette Value')
plt.axvline(x=np.mean(sorted_cluster_svl),
color='black', label='avg. Silhouette score')
plt.text(np.mean(sorted_cluster_svl)+0.02, -1,
'Avg. \nSilhouette score='+str(round(np.mean(sorted_cluster_svl), 4)))
plt.show()
X = data_iris_qaoa
labels = y_kmeans
silhouette_coef, silhouette_val_list = get_silhouette_results(X, labels)
print(silhouette_coef)
0.8616729348759894
type(xbest_brute)
list
import seaborn as sns
# 각 클러스터별로 Silhouette(실루엣) 값을 정렬한다.
uniq_labels = np.unique(labels)
sorted_cluster_svl = []
rearr_labels = []
for ul in uniq_labels:
labels_idx = np.where(labels == ul)[0]
target_svl = silhouette_val_list[labels_idx]
sorted_cluster_svl += sorted(target_svl)
rearr_labels += [ul]*len(target_svl)
colors = sns.color_palette('hls', len(uniq_labels))
color_labels = [colors[i] for i in rearr_labels]
fig = plt.figure(figsize=(6, 10))
fig.set_facecolor('white')
plt.barh(range(len(sorted_cluster_svl)),
sorted_cluster_svl, color=color_labels)
plt.title('Silhoutte Score of K-Means')
plt.ylabel('Data Index')
plt.xlabel('Silhouette Value')
plt.axvline(x=np.mean(sorted_cluster_svl),
color='black', label='avg. Silhouette score')
plt.text(np.mean(sorted_cluster_svl)+0.02, -1,
'Avg. \nSilhouette score='+str(round(np.mean(sorted_cluster_svl), 4)))
plt.show()
data_iris_qaoa_label
data_iris_qaoa_label2 = np.where(
data_iris_qaoa_label > 1, 1, data_iris_qaoa_label)
data_iris_qaoa_label2
array([0, 0, 0, 0, 1, 1, 1, 1])
X = data_iris_qaoa
labels = data_iris_qaoa_label2
silhouette_coef, silhouette_val_list = get_silhouette_results(X, labels)
print(silhouette_coef)
0.8616729348759894
import seaborn as sns
# 각 클러스터별로 Silhouette(실루엣) 값을 정렬한다.
uniq_labels = np.unique(labels)
sorted_cluster_svl = []
rearr_labels = []
for ul in uniq_labels:
labels_idx = np.where(labels == ul)[0]
target_svl = silhouette_val_list[labels_idx]
sorted_cluster_svl += sorted(target_svl)
rearr_labels += [ul]*len(target_svl)
colors = sns.color_palette('hls', len(uniq_labels))
color_labels = [colors[i] for i in rearr_labels]
fig = plt.figure(figsize=(6, 10))
fig.set_facecolor('white')
plt.barh(range(len(sorted_cluster_svl)),
sorted_cluster_svl, color=color_labels)
plt.title('Silhoutte Score of True Label')
plt.ylabel('Data Index')
plt.xlabel('Silhouette Value')
plt.axvline(x=np.mean(sorted_cluster_svl),
color='black', label='avg. Silhouette score')
plt.text(np.mean(sorted_cluster_svl)+0.02, -1,
'Avg. \nSilhouette score='+str(round(np.mean(sorted_cluster_svl), 4)))
plt.show()
import numpy as np
import random
import matplotlib.pyplot as plt
from itertools import combinations
import numpy as np
import random
import matplotlib.pyplot as plt
from itertools import combinations
np.random.seed(100)
num_data = 20
x1 = np.linspace(0.3, 0.7, num_data)
error = np.random.normal(1, 0.5, num_data)
x2 = 1.5*x1+2+error
X = np.stack([x1, x2], axis=1)
X
array([[0.3 , 2.57511726],
[0.32105263, 3.65291915],
[0.34210526, 4.0896758 ],
[0.36315789, 3.41851882],
[0.38421053, 4.06697618],
[0.40526316, 3.86500416],
[0.42631579, 3.75006352],
[0.44736842, 3.13603097],
[0.46842105, 3.60788366],
[0.48947368, 3.86171125],
[0.51052632, 3.53677598],
[0.53157895, 4.01495017],
[0.55263158, 3.53714984],
[0.57368421, 4.26894985],
[0.59473684, 4.22846567],
[0.61578947, 3.87147864],
[0.63684211, 3.68962297],
[0.65789474, 4.50170845],
[0.67894737, 3.79935324],
[0.7 , 3.49084088]])
fig = plt.figure(figsize=(7, 7))
fig.set_facecolor('white')
plt.scatter(x1, x2, color='k')
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()
def complete_diameter_distance(X):
res = []
for i, j in combinations(range(X.shape[0]), 2):
a_i = X[i, :]
a_j = X[j, :]
res.append(np.linalg.norm(a_i-a_j))
return np.max(res)
complete_diameter_distance(X)
1.9595515390777758
def average_diameter_distance(X):
res = []
for i, j in combinations(range(X.shape[0]), 2):
a_i = X[i, :]
a_j = X[j, :]
res.append(np.linalg.norm(a_i-a_j))
return np.mean(res)
average_diameter_distance(X)
0.5159584769337329
def centroid_diameter_distance(X):
center = np.mean(X, axis=0)
res = 2*np.mean([np.linalg.norm(x-center) for x in X])
return res
centroid_diameter_distance(X)
0.6874635793987067
np.random.seed(100)
x11 = np.linspace(0.3, 0.7, 20)
label1 = [0]*len(x11)
x12 = np.linspace(1.3, 1.8, 15)
label2 = [1]*len(x12)
error = np.random.normal(1, 0.5, 35)
x1 = np.concatenate((x11, x12), axis=None)
x2 = 1.5*x1+2+error
labels = label1+label2
X = np.stack((x1, x2), axis=1)
labels = np.array(labels)
X1 = X[np.where(labels == 0)[0], :]
X2 = X[np.where(labels == 1)[0], :]
fig = plt.figure(figsize=(7, 7))
fig.set_facecolor('white')
for i, x in enumerate(X):
if labels[i] == 0:
plt.scatter(x[0], x[1], color='blue')
else:
plt.scatter(x[0], x[1], color='red')
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()
def single_linkage_distance(X1, X2):
res = []
for x1 in X1:
for x2 in X2:
res.append(np.linalg.norm(x1-x2))
return np.min(res)
single_linkage_distance(X1, X2)
0.7724228550378145
def complete_linkage_distance(X1, X2):
res = []
for x1 in X1:
for x2 in X2:
res.append(np.linalg.norm(x1-x2))
return np.max(res)
complete_linkage_distance(X1, X2)
3.807983171195838
def average_linkage_distance(X1, X2):
res = []
for x1 in X1:
for x2 in X2:
res.append(np.linalg.norm(x1-x2))
return np.mean(res)
average_linkage_distance(X1, X2)
2.0502961616379003
def centroid_linkage_distance(X1, X2):
center1 = np.mean(X1, axis=0)
center2 = np.mean(X2, axis=0)
return np.linalg.norm(center1-center2)
centroid_linkage_distance(X1, X2)
2.023846293346597
def average_of_centroids_linkage_distance(X1, X2):
center1 = np.mean(X1, axis=0)
center2 = np.mean(X2, axis=0)
res = []
for x1 in X1:
res.append(np.linalg.norm(x1-center2))
for x2 in X2:
res.append(np.linalg.norm(x2-center1))
return np.mean(res)
average_of_centroids_linkage_distance(X1, X2)
2.035733790732974
np.random.seed(100)
num_data = 50
x11 = np.linspace(0.3, 0.7, 20)
label1 = [0]*len(x11)
x12 = np.linspace(1.3, 1.8, 15)
label2 = [1]*len(x12)
x13 = np.linspace(2.4, 3, 15)
label3 = [2]*len(x13)
x1 = np.concatenate((x11, x12, x13), axis=None)
error = np.random.normal(1, 0.5, num_data)
x2 = 1.5*x1+2+error
X = np.stack((x1, x2), axis=1)
labels = np.array(label1+label2+label3)
fig = plt.figure(figsize=(7, 7))
fig.set_facecolor('white')
for i, x in enumerate(X):
if labels[i] == 0:
plt.scatter(x[0], x[1], color='blue')
elif labels[i] == 1:
plt.scatter(x[0], x[1], color='red')
else:
plt.scatter(x[0], x[1], color='green')
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()
def Dunn_index(X, labels, intra_cluster_distance_type, inter_cluster_distance_type):
intra_cdt_dict = {
'cmpl_dd': complete_diameter_distance,
'avdd': average_diameter_distance,
'cent_dd': centroid_diameter_distance
}
inter_cdt_dict = {
'sld': single_linkage_distance,
'cmpl_ld': complete_linkage_distance,
'avld': average_linkage_distance,
'cent_ld': centroid_linkage_distance,
'av_cent_ld': average_of_centroids_linkage_distance
}
# intra cluster distance
intra_cluster_distance = intra_cdt_dict[intra_cluster_distance_type]
# inter cluster distance
inter_cluster_distance = inter_cdt_dict[inter_cluster_distance_type]
# get minimum value of inter_cluster_distance
res1 = []
for i, j in combinations(np.unique(labels), 2):
X1 = X[np.where(labels == i)[0], :]
X2 = X[np.where(labels == j)[0], :]
res1.append(inter_cluster_distance(X1, X2))
min_inter_cd = np.min(res1)
# get maximum value of intra_cluser_distance
res2 = []
for label in np.unique(labels):
X_target = X[np.where(labels == label)[0], :]
if X_target.shape[0] >= 2:
res2.append(intra_cluster_distance(X_target))
else:
res2.append(0)
max_intra_cd = np.max(res2)
Dunn_idx = min_inter_cd/max_intra_cd
return Dunn_idx
intra_cluster_distance_type = ['cmpl_dd', 'avdd', 'cent_dd']
inter_cluster_distance_type = [
'sld', 'cmpl_ld', 'avld', 'cent_ld', 'av_cent_ld']
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
print("Dunn Index:", "Intra Cluster Dist.:", '%-10s' % intra_cluster_distance_type[i],
"Inter Cluster Dist.:", '%-12s' % inter_cluster_distance_type[j],
"Dunn Index Valule:", Dunn_index(X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j]))
Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: sld Dunn Index Valule: 0.2780279425885912 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 1.5816104606529293 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: avld Dunn Index Valule: 0.7627361333011984 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 0.7374610426286897 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 0.7486880384584048 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: sld Dunn Index Valule: 0.8233205335652861 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 4.683602504961463 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: avld Dunn Index Valule: 2.2586806002025015 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cent_ld Dunn Index Valule: 2.1838338026300956 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 2.2170801594919096 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: sld Dunn Index Valule: 0.6140262424157213 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 3.492995412900513 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: avld Dunn Index Valule: 1.6845069510824404 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 1.6286867741323774 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 1.6534816562537682
import random
import pandas as pd
# 빈 DataFrame 생성하기
Dunn_Index_result = pd.DataFrame(
columns=['intra cluster', 'inter cluster', 'Dunn index'])
Dunn_Index_result
| intra cluster | inter cluster | Dunn index |
|---|
len(inter_cluster_distance_type)
5
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
Dunn_Index_result.loc[len(inter_cluster_distance_type)
* i+j, 'intra cluster'] = intra_cluster_distance_type[i]
Dunn_Index_result.loc[len(inter_cluster_distance_type)
* i+j, 'inter cluster'] = inter_cluster_distance_type[j],
Dunn_Index_result.loc[len(inter_cluster_distance_type)*i+j, 'Dunn index'] = Dunn_index(
X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j])
Dunn_Index_result
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 0 | cmpl_dd | (sld,) | 0.278028 |
| 1 | cmpl_dd | (cmpl_ld,) | 1.58161 |
| 2 | cmpl_dd | (avld,) | 0.762736 |
| 3 | cmpl_dd | (cent_ld,) | 0.737461 |
| 4 | cmpl_dd | (av_cent_ld,) | 0.748688 |
| 5 | avdd | (sld,) | 0.823321 |
| 6 | avdd | (cmpl_ld,) | 4.683603 |
| 7 | avdd | (avld,) | 2.258681 |
| 8 | avdd | (cent_ld,) | 2.183834 |
| 9 | avdd | (av_cent_ld,) | 2.21708 |
| 10 | cent_dd | (sld,) | 0.614026 |
| 11 | cent_dd | (cmpl_ld,) | 3.492995 |
| 12 | cent_dd | (avld,) | 1.684507 |
| 13 | cent_dd | (cent_ld,) | 1.628687 |
| 14 | cent_dd | (av_cent_ld,) | 1.653482 |
Dunn_Index_result[Dunn_Index_result['Dunn index']
== Dunn_Index_result['Dunn index'].max()]
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 6 | avdd | (cmpl_ld,) | 4.683603 |
X = data_iris_qaoa
labels = np.array(xbest_brute)
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
print("Dunn Index:", "Intra Cluster Dist.:", '%-10s' % intra_cluster_distance_type[i],
"Inter Cluster Dist.:", '%-12s' % inter_cluster_distance_type[j],
"Dunn Index Valule:", Dunn_index(X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j]))
Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: sld Dunn Index Valule: 2.380901721852151 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 2.9277002188455987 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: avld Dunn Index Valule: 2.646978455013341 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 2.613022682257832 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 2.6300584339086135 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: sld Dunn Index Valule: 3.9627734577479075 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 4.872865021265471 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: avld Dunn Index Valule: 4.405631643038814 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cent_ld Dunn Index Valule: 4.349115645852598 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 4.377469955421489 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: sld Dunn Index Valule: 3.3896651116226946 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 4.1681364661247065 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: avld Dunn Index Valule: 3.768475799662944 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 3.7201333178245926 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 3.7443869409334036
# 빈 DataFrame 생성하기
Dunn_Index_result_brute = pd.DataFrame(
columns=['intra cluster', 'inter cluster', 'Dunn index'])
Dunn_Index_result_brute
| intra cluster | inter cluster | Dunn index |
|---|
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
Dunn_Index_result_brute.loc[len(inter_cluster_distance_type)
* i+j, 'intra cluster'] = intra_cluster_distance_type[i]
Dunn_Index_result_brute.loc[len(inter_cluster_distance_type)
* i+j, 'inter cluster'] = inter_cluster_distance_type[j]
Dunn_Index_result_brute.loc[len(inter_cluster_distance_type)*i+j, 'Dunn index'] = Dunn_index(
X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j])
Dunn_Index_result_brute
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 0 | cmpl_dd | sld | 2.380902 |
| 1 | cmpl_dd | cmpl_ld | 2.9277 |
| 2 | cmpl_dd | avld | 2.646978 |
| 3 | cmpl_dd | cent_ld | 2.613023 |
| 4 | cmpl_dd | av_cent_ld | 2.630058 |
| 5 | avdd | sld | 3.962773 |
| 6 | avdd | cmpl_ld | 4.872865 |
| 7 | avdd | avld | 4.405632 |
| 8 | avdd | cent_ld | 4.349116 |
| 9 | avdd | av_cent_ld | 4.37747 |
| 10 | cent_dd | sld | 3.389665 |
| 11 | cent_dd | cmpl_ld | 4.168136 |
| 12 | cent_dd | avld | 3.768476 |
| 13 | cent_dd | cent_ld | 3.720133 |
| 14 | cent_dd | av_cent_ld | 3.744387 |
Dunn_Index_result_brute[Dunn_Index_result_brute['Dunn index']
== Dunn_Index_result_brute['Dunn index'].max()]
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 6 | avdd | cmpl_ld | 4.872865 |
X = data_iris_qaoa
labels = np.array(xbest_qaoa)
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
print("Dunn Index:", "Intra Cluster Dist.:", '%-10s' % intra_cluster_distance_type[i],
"Inter Cluster Dist.:", '%-12s' % inter_cluster_distance_type[j],
"Dunn Index Valule:", Dunn_index(X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j]))
Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: sld Dunn Index Valule: 2.380901721852151 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 2.9277002188455987 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: avld Dunn Index Valule: 2.646978455013341 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 2.613022682257832 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 2.6300584339086135 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: sld Dunn Index Valule: 3.9627734577479075 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 4.872865021265471 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: avld Dunn Index Valule: 4.405631643038814 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cent_ld Dunn Index Valule: 4.349115645852598 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 4.377469955421489 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: sld Dunn Index Valule: 3.3896651116226946 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 4.1681364661247065 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: avld Dunn Index Valule: 3.768475799662944 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 3.7201333178245926 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 3.7443869409334036
# 빈 DataFrame 생성하기
Dunn_Index_result_qaoa = pd.DataFrame(
columns=['intra cluster', 'inter cluster', 'Dunn index'])
Dunn_Index_result_qaoa
| intra cluster | inter cluster | Dunn index |
|---|
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
Dunn_Index_result_qaoa.loc[len(inter_cluster_distance_type)
* i+j, 'intra cluster'] = intra_cluster_distance_type[i]
Dunn_Index_result_qaoa.loc[len(inter_cluster_distance_type)
* i+j, 'inter cluster'] = inter_cluster_distance_type[j]
Dunn_Index_result_qaoa.loc[len(inter_cluster_distance_type)*i+j, 'Dunn index'] = Dunn_index(
X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j])
Dunn_Index_result_qaoa
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 0 | cmpl_dd | sld | 2.380902 |
| 1 | cmpl_dd | cmpl_ld | 2.9277 |
| 2 | cmpl_dd | avld | 2.646978 |
| 3 | cmpl_dd | cent_ld | 2.613023 |
| 4 | cmpl_dd | av_cent_ld | 2.630058 |
| 5 | avdd | sld | 3.962773 |
| 6 | avdd | cmpl_ld | 4.872865 |
| 7 | avdd | avld | 4.405632 |
| 8 | avdd | cent_ld | 4.349116 |
| 9 | avdd | av_cent_ld | 4.37747 |
| 10 | cent_dd | sld | 3.389665 |
| 11 | cent_dd | cmpl_ld | 4.168136 |
| 12 | cent_dd | avld | 3.768476 |
| 13 | cent_dd | cent_ld | 3.720133 |
| 14 | cent_dd | av_cent_ld | 3.744387 |
Dunn_Index_result_qaoa[Dunn_Index_result_qaoa['Dunn index']
== Dunn_Index_result_qaoa['Dunn index'].max()]
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 6 | avdd | cmpl_ld | 4.872865 |
X = data_iris_qaoa
labels = y_kmeans
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
print("Dunn Index:", "Intra Cluster Dist.:", '%-10s' % intra_cluster_distance_type[i],
"Inter Cluster Dist.:", '%-12s' % inter_cluster_distance_type[j],
"Dunn Index Valule:", Dunn_index(X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j]))
Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: sld Dunn Index Valule: 2.380901721852151 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 2.9277002188455987 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: avld Dunn Index Valule: 2.646978455013341 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 2.613022682257832 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 2.6300584339086135 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: sld Dunn Index Valule: 3.9627734577479075 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 4.872865021265471 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: avld Dunn Index Valule: 4.405631643038814 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cent_ld Dunn Index Valule: 4.349115645852598 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 4.377469955421489 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: sld Dunn Index Valule: 3.3896651116226946 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 4.1681364661247065 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: avld Dunn Index Valule: 3.768475799662944 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 3.7201333178245926 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 3.7443869409334036
# 빈 DataFrame 생성하기
Dunn_Index_result_kmeans = pd.DataFrame(
columns=['intra cluster', 'inter cluster', 'Dunn index'])
Dunn_Index_result_kmeans
| intra cluster | inter cluster | Dunn index |
|---|
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
Dunn_Index_result_kmeans.loc[len(inter_cluster_distance_type)
* i+j, 'intra cluster'] = intra_cluster_distance_type[i]
Dunn_Index_result_kmeans.loc[len(inter_cluster_distance_type)
* i+j, 'inter cluster'] = inter_cluster_distance_type[j]
Dunn_Index_result_kmeans.loc[len(inter_cluster_distance_type)*i+j, 'Dunn index'] = Dunn_index(
X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j])
Dunn_Index_result_kmeans
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 0 | cmpl_dd | sld | 2.380902 |
| 1 | cmpl_dd | cmpl_ld | 2.9277 |
| 2 | cmpl_dd | avld | 2.646978 |
| 3 | cmpl_dd | cent_ld | 2.613023 |
| 4 | cmpl_dd | av_cent_ld | 2.630058 |
| 5 | avdd | sld | 3.962773 |
| 6 | avdd | cmpl_ld | 4.872865 |
| 7 | avdd | avld | 4.405632 |
| 8 | avdd | cent_ld | 4.349116 |
| 9 | avdd | av_cent_ld | 4.37747 |
| 10 | cent_dd | sld | 3.389665 |
| 11 | cent_dd | cmpl_ld | 4.168136 |
| 12 | cent_dd | avld | 3.768476 |
| 13 | cent_dd | cent_ld | 3.720133 |
| 14 | cent_dd | av_cent_ld | 3.744387 |
Dunn_Index_result_kmeans[Dunn_Index_result_kmeans['Dunn index']
== Dunn_Index_result_kmeans['Dunn index'].max()]
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 6 | avdd | cmpl_ld | 4.872865 |
X = data_iris_qaoa
labels = data_iris_qaoa_label2
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
print("Dunn Index:", "Intra Cluster Dist.:", '%-10s' % intra_cluster_distance_type[i],
"Inter Cluster Dist.:", '%-12s' % inter_cluster_distance_type[j],
"Dunn Index Valule:", Dunn_index(X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j]))
Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: sld Dunn Index Valule: 2.380901721852151 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 2.9277002188455987 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: avld Dunn Index Valule: 2.646978455013341 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 2.613022682257832 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 2.6300584339086135 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: sld Dunn Index Valule: 3.9627734577479075 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 4.872865021265471 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: avld Dunn Index Valule: 4.405631643038814 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cent_ld Dunn Index Valule: 4.349115645852598 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 4.377469955421489 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: sld Dunn Index Valule: 3.3896651116226946 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 4.1681364661247065 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: avld Dunn Index Valule: 3.768475799662944 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 3.7201333178245926 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 3.7443869409334036
# 빈 DataFrame 생성하기
Dunn_Index_result_truelabel = pd.DataFrame(
columns=['intra cluster', 'inter cluster', 'Dunn index'])
Dunn_Index_result_truelabel
| intra cluster | inter cluster | Dunn index |
|---|
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
Dunn_Index_result_truelabel.loc[len(inter_cluster_distance_type)
* i+j, 'intra cluster'] = intra_cluster_distance_type[i]
Dunn_Index_result_truelabel.loc[len(inter_cluster_distance_type)
* i+j, 'inter cluster'] = inter_cluster_distance_type[j]
Dunn_Index_result_truelabel.loc[len(inter_cluster_distance_type)*i+j, 'Dunn index'] = Dunn_index(
X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j])
Dunn_Index_result_truelabel
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 0 | cmpl_dd | sld | 2.380902 |
| 1 | cmpl_dd | cmpl_ld | 2.9277 |
| 2 | cmpl_dd | avld | 2.646978 |
| 3 | cmpl_dd | cent_ld | 2.613023 |
| 4 | cmpl_dd | av_cent_ld | 2.630058 |
| 5 | avdd | sld | 3.962773 |
| 6 | avdd | cmpl_ld | 4.872865 |
| 7 | avdd | avld | 4.405632 |
| 8 | avdd | cent_ld | 4.349116 |
| 9 | avdd | av_cent_ld | 4.37747 |
| 10 | cent_dd | sld | 3.389665 |
| 11 | cent_dd | cmpl_ld | 4.168136 |
| 12 | cent_dd | avld | 3.768476 |
| 13 | cent_dd | cent_ld | 3.720133 |
| 14 | cent_dd | av_cent_ld | 3.744387 |
Dunn_Index_result_truelabel[Dunn_Index_result_truelabel['Dunn index']
== Dunn_Index_result_truelabel['Dunn index'].max()]
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 6 | avdd | cmpl_ld | 4.872865 |
Dunn_Index_results_combined = Dunn_Index_result_brute[[
'intra cluster', 'inter cluster']]
Dunn_Index_results_combined['Dunn index - Brute'] = Dunn_Index_result_brute['Dunn index']
Dunn_Index_results_combined['Dunn index - QAOA'] = Dunn_Index_result_qaoa['Dunn index']
Dunn_Index_results_combined['Dunn index - K-Means'] = Dunn_Index_result_kmeans['Dunn index']
Dunn_Index_results_combined['Dunn index - True label'] = Dunn_Index_result_truelabel['Dunn index']
Dunn_Index_results_combined
| intra cluster | inter cluster | Dunn index - Brute | Dunn index - QAOA | Dunn index - K-Means | Dunn index - True label | |
|---|---|---|---|---|---|---|
| 0 | cmpl_dd | sld | 2.380902 | 2.380902 | 2.380902 | 2.380902 |
| 1 | cmpl_dd | cmpl_ld | 2.9277 | 2.9277 | 2.9277 | 2.9277 |
| 2 | cmpl_dd | avld | 2.646978 | 2.646978 | 2.646978 | 2.646978 |
| 3 | cmpl_dd | cent_ld | 2.613023 | 2.613023 | 2.613023 | 2.613023 |
| 4 | cmpl_dd | av_cent_ld | 2.630058 | 2.630058 | 2.630058 | 2.630058 |
| 5 | avdd | sld | 3.962773 | 3.962773 | 3.962773 | 3.962773 |
| 6 | avdd | cmpl_ld | 4.872865 | 4.872865 | 4.872865 | 4.872865 |
| 7 | avdd | avld | 4.405632 | 4.405632 | 4.405632 | 4.405632 |
| 8 | avdd | cent_ld | 4.349116 | 4.349116 | 4.349116 | 4.349116 |
| 9 | avdd | av_cent_ld | 4.37747 | 4.37747 | 4.37747 | 4.37747 |
| 10 | cent_dd | sld | 3.389665 | 3.389665 | 3.389665 | 3.389665 |
| 11 | cent_dd | cmpl_ld | 4.168136 | 4.168136 | 4.168136 | 4.168136 |
| 12 | cent_dd | avld | 3.768476 | 3.768476 | 3.768476 | 3.768476 |
| 13 | cent_dd | cent_ld | 3.720133 | 3.720133 | 3.720133 | 3.720133 |
| 14 | cent_dd | av_cent_ld | 3.744387 | 3.744387 | 3.744387 | 3.744387 |